Initially when I began creating these graphs I partitioned the data into two groups, “below 11 years old reading age” and “not below 11 years old reading age”. Thus, the following graphics showcase the absolute values and then the proportions for each respective year.
total_NGRT %>%
group_by(below_11_years_old, Year) %>%
summarise(total = n()) %>%
ungroup() %>%
ggplot(aes(x = Year, y = total, fill = fct_rev(below_11_years_old)))+
geom_bar(stat = "identity", position = "dodge", color = "black")+
theme_bw()+
labs(fill = "Below 11 \nYears Old",
title = "Amount of Students Below a 11 Year Old Reading Age")
total_NGRT %>%
group_by(below_11_years_old, Year) %>%
summarise(total = n()) %>%
ungroup() %>%
group_by(Year) %>%
mutate(percent = total/sum(total)*100) %>%
ggplot(aes(x = Year, y = percent, fill = fct_rev(below_11_years_old)))+
geom_bar(stat = "identity", position = "stack", color = "black")+
scale_y_continuous(name="Percent (%)", limits=c(0, 100), breaks = seq(0,100,10))+
theme_bw()+
labs(fill = "Below 11 \nYears Old",
title = "Amount of Students Below 11 Years Old Reading Age")
total_NGRT %>%
group_by(below_11_years_old, Year, Gender) %>%
summarise(total = n()) %>%
ungroup() %>%
mutate(Gender = as.factor(Gender)) %>%
ggplot(aes(x = Year, y = total, fill = fct_rev(below_11_years_old)))+
geom_bar(stat = "identity", position = "dodge", color = "black")+
scale_y_continuous(name="Frequency", limits=c(0, 60), breaks = seq(0,60,10))+
facet_wrap(~Gender)+
theme_bw()+
labs(fill = "Below 11 \nYears Old",
title = "Amount of Students Below 11 Years Old Reading Age")
total_NGRT %>%
group_by(below_11_years_old, Year, Gender) %>%
summarise(total = n()) %>%
ungroup() %>%
group_by(Year,Gender) %>%
mutate(percent = total/sum(total)*100) %>%
ungroup() %>%
mutate(Gender = as.factor(Gender)) %>%
ggplot(aes(x = Year, y = percent, fill = fct_rev(below_11_years_old)))+
geom_bar(stat = "identity", color = "black")+
scale_y_continuous(name="Percent (%)", limits=c(0, 100), breaks = seq(0,100,10))+
facet_wrap(~Gender)+
theme_bw()+
labs(fill = "Below 11 \nYears Old",
title = "Amount of Students Below 11 Years Old Reading Age")
To enrich the NRGT data, I incorporated the Dynamic Student List Report from OneSchool to identify demographic information for each observation of the reading dataset. This was done merging each dataset together using EQ_ID. The information I was interested in was “origin school” and if any trends were present when students were grouped by their primary school.
Not all students that took the NGRT test in previous years, 2018 - 2020, were enrolled at Tully SHS during the generation of this report. Despite this, students with a “Left” enrolment status were still used in the following graphics. I also observed an error in the Dynamic Student List data, in that some students had “Tully State High School” as their origin school. Only 5 students were observed with this error and these students were placed into “Tully State School” category after each student was manually inspected using OneSchool.
total_NGRT_school_data %>%
filter(is.na(Student_Name) == F) %>%
group_by(Origin_School, below_11_years_old) %>%
summarise(value = n()) %>%
ungroup() %>%
mutate(Origin_School = ifelse(value<4, "Small Schools (< 4 Students)",Origin_School)) %>%
group_by(Origin_School, below_11_years_old) %>%
summarise(total_school_amount = sum(value)) %>%
ungroup() %>%
ggplot(aes(x = fct_reorder(Origin_School, -total_school_amount), y = total_school_amount, fill = fct_rev(below_11_years_old)))+
geom_bar(stat = "identity", color = "black")+
theme_bw()+
theme(axis.text.x = element_text(angle=60, hjust=1))+
scale_y_continuous(name="Frequency", limits=c(0, 200), breaks = seq(0,200,20))+
labs(fill = "Below Reading\nAge",
x = "Primary School",
title = "Distribution of Students Below a 11 Year Old Reading Age")
total_NGRT_school_data %>%
filter(is.na(Student_Name) == F) %>%
group_by(Origin_School, below_11_years_old) %>%
summarise(value = n()) %>%
ungroup() %>%
mutate(Origin_School = ifelse(value<4, "Small Schools (< 4 Students)",Origin_School)) %>%
group_by(Origin_School, below_11_years_old) %>%
summarise(total_school_amount = sum(value)) %>%
ungroup() %>%
group_by(Origin_School) %>%
mutate(percent = round(total_school_amount/sum(total_school_amount)*100,2)) %>%
ungroup() %>%
ggplot(aes(x = fct_reorder(Origin_School, -total_school_amount), y = percent, fill = fct_rev(below_11_years_old)))+
geom_bar(stat = "identity", color = "black")+
theme_bw()+
theme(axis.text.x = element_text(angle=60, hjust=1))+
scale_y_continuous(name="Percent (%)", limits=c(0, 100), breaks = seq(0,100,10))+
labs(fill = "Below Reading\nAge",
x = "Primary School",
title = "Distribution of Students Below a 11 Year Old Reading Age")
total_NGRT_school_data %>%
filter(is.na(Student_Name) == F) %>%
group_by(Origin_School, below_11_years_old, Year.x) %>%
summarise(value = n()) %>%
ungroup() %>%
mutate(Origin_School = ifelse(value<2, "Small Schools (< 2 Students)",Origin_School)) %>%
group_by(Origin_School, below_11_years_old, Year.x) %>%
summarise(total_school_amount = sum(value)) %>%
ungroup() %>%
ggplot(aes(x = fct_reorder(Origin_School, -total_school_amount), y = total_school_amount, fill = fct_rev(below_11_years_old)))+
geom_bar(stat = "identity", color = "black")+
facet_grid(Year.x~., scales = "free")+
theme_bw()+
theme(axis.text.x = element_text(angle=60, hjust=1), legend.position = "none")+
scale_y_continuous(name="Frequency", limits=c(0, 60), breaks = seq(0,60,10))+
labs(x = "Primary School",
title = "Distribution of Students Below a 11 Year Old Reading Age")
total_NGRT_school_data %>%
filter(is.na(Student_Name) == F) %>%
group_by(Origin_School, below_11_years_old, Year.x) %>%
summarise(value = n()) %>%
ungroup() %>%
mutate(Origin_School = ifelse(value<2, "Small Schools (< 2 Students)",Origin_School)) %>%
group_by(Origin_School, below_11_years_old, Year.x) %>%
summarise(total_school_amount = sum(value)) %>%
ungroup() %>%
group_by(Origin_School, Year.x) %>%
mutate(percent = round(total_school_amount/sum(total_school_amount)*100,2))%>%
ggplot(aes(x = fct_reorder(Origin_School, -total_school_amount), y = percent, fill = fct_rev(below_11_years_old)))+
geom_bar(stat = "identity", color = "black")+
facet_grid(Year.x~., scales = "free")+
theme_bw()+
theme(axis.text.x = element_text(angle=60, hjust=1), legend.position = "none")+
scale_y_continuous(name="Percent (%)", limits=c(0, 100), breaks = seq(0,100,10))+
labs(x = "Primary School",
title = "Proportion of Students Below 11 Year Old Reading Age",
subtitle = "Last 4 Years")
The NGRT dataset provided to the school using a 95% confidence interval. This means that if the test was repeated 100 times, student reading agaes would flucuate between the upper and lower CI interval 95 times (95% accuracy rating). For this reason, I only used the Overall Reading Age was used to inspect performance of the students over the last 4 years. However, the dataset that was provided by the school was not in an ideal formate and utilised a 12 point scale between each year from 10:00 to 10:11.
To inspect distribution of the reading age by a variety of factors, I needed to transform this categorical scale to a numeric scale. To achieve this, I separated each reading age into years and months using the “:”, added 1 to each of the months to get rid of the zero values, remove the plus signs (17:00+) from high reading ages, divded months by 13 to get a decimal value and added that decimal value to the years column. The reason, I did not divide by 13 was becuase when I added 1 to to a value of 11 and then divided by 12 it returned a value of 1 thus, resulting in an incorrect reading age (increasing reading age by 1 and not 0.92). Also all student above 17 years old were grouped together because the upper limiter was not specified.
all_metrics <- rbind(metrics_2018, metrics_2019, metrics_2020, metrics_2021)
rownames(all_metrics) <- c("2018","2019","2020","2021")
all_metrics$Max <- "17:00+"
all_metrics<- as.data.frame(t(all_metrics))
knitr::kable(all_metrics, align = "lccrr", digits = 2)
| 2018 | 2019 | 2020 | 2021 | |
|---|---|---|---|---|
| Max | 17:00+ | 17:00+ | 17:00+ | 17:00+ |
| Q3 | 14.308 | 14.077 | 14.308 | 13.692 |
| Median | 12.154 | 12.923 | 12.154 | 11.615 |
| Q1 | 10.30800 | 10.92300 | 9.25025 | 9.42350 |
| Min | 5.308 | 5.923 | 5.154 | 5.077 |
total_NGRT_school_data %>%
ggplot(aes(x = Year.x, y = Years_as_numeric, fill = Year.x))+
geom_boxplot()+
scale_y_continuous(name = "Reading Age", limits = c(5.0,17.5), breaks = seq(5.0,17.5,0.5))+
theme_bw()
hist(total_NGRT_school_data$Years_as_numeric[which(total_NGRT_school_data$Year.x == "2018")], xaxt = "n", main = "Distribution of Reading Age for 2018", xlab = "Reading Age")
axis(side = 1, at = seq(4,18,1))
hist(total_NGRT_school_data$Years_as_numeric[which(total_NGRT_school_data$Year.x == "2019")], xaxt = "n", main = "Distribution of Reading Age for 2019", xlab = "Reading Age")
axis(side = 1, at = seq(4,18,1))
hist(total_NGRT_school_data$Years_as_numeric[which(total_NGRT_school_data$Year.x == "2020")], xaxt = "n", main = "Distribution of Reading Age for 2020", xlab = "Reading Age")
axis(side = 1, at = seq(4,18,1))
hist(total_NGRT_school_data$Years_as_numeric[which(total_NGRT_school_data$Year.x == "2021")], xaxt = "n", main = "Distribution of Reading Age for 2021", xlab = "Reading Age")
axis(side = 1, at = seq(4,18,1))
hist(total_NGRT_school_data$Years_as_numeric, xaxt = "n", main = "Distribution of Reading Age", xlab = "Reading Age")
axis(side = 1, at = seq(4,18,1))
NGRT_year <- total_NGRT_school_data %>%
filter(is.na(Origin_School) == F) %>%
group_by(Origin_School) %>%
select(Origin_School, Years_as_numeric, Year.x) %>%
mutate(school_count = n(),
Origin_School = ifelse(school_count <= 5, "Small School (<=5)", Origin_School)) %>%
ungroup()
Years_dist<- NGRT_year %>% ggplot(aes(Years_as_numeric, fill =Year.x))+
facet_wrap(~Origin_School, scales = "free")+
geom_density(alpha = .5,color = "black")+
theme(axis.text.x = element_text(angle=60, hjust=1))+
theme_bw()
ggplotly(Years_dist)
total_NGRT_school_data %>%
filter(is.na(Origin_School) == F) %>%
group_by(Origin_School) %>%
select(Origin_School, Years_as_numeric, Gender) %>%
mutate(school_count = n(),
Origin_School = ifelse(school_count <= 5, "Small School (<=5)", Origin_School)) %>%
ungroup() %>%
ggplot(aes(Years_as_numeric, color = fct_rev(Gender)))+
facet_wrap(~Origin_School, scales = "free")+
geom_density()+
theme_bw()+
labs(title = "Distribution of Gender per Primary School",
fill = "Gender")